---
title: "Opportunistic Climate Adaptation and Public Support for Glacially-Derived Sand Extraction in Greenland"
authors: "Mette Bendixen, Rasmus Leander Nielsen, Jane Lund Plesner, & Kelton Minor"
code correspondence: "kelton dot minor at gmail dot com"
date: "4/05/2021"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r}
#load packages
library(sf)
library(lwgeom)
library(fst)
library(readr)
library(webshot)
library(vistime)
library(plotly)
library(bit64) 
library(ggthemes) 
library(ggmap) 
library(ggplot2) 
library(Hmisc) 
library(RColorBrewer) 
library(grid) 
library(MASS) 
library(gplots) 
library(stargazer) 
library(lubridate) 
library(zoo) 
library(reshape2) 
library(sp) 
library(raster) 
library(maptools) 
library(rgeos) 
library(rgdal) 
library(RCurl) 
library(plyr) 
library(dplyr)
library(data.table) 
library(foreign) 
library(parallel) 
library(foreach) 
library(doMC) 
library(caTools) 
library(ncdf4) 
library(stringr) 
library(knitr) 
library(pander) 
library(rasterVis) 
library(aod)
library(viridis) 
library(readxl)
library(SDaA)
library(lutz)
library(stringdist)
library(humidity)
library(survey)
library(modelsummary)
library(flextable)
```


#NOTE: 
#Data Structuring Code Has Been Commented Out* 
#Folder contains model output data (Run Fig Plotting Code Below)
#*raw respondent-level data cannot be shared to preserve privacy, please contact authors with further questions or suggestions for further analyses

#Survey data structuring and post-stratification
```{r}
# #Load data
# #Greenland Dec 2020 - Jan 2021 Climate Adaptation HSA survey data
# #Load Greenland HS Analyse 2020 Climate Adaptation survey data
# dat<-fread("/Home/Analysis/klimasand_2.csv", header = T)
# #Load Statistics Greenland population data for weights
# pop<-fread("/Home/Analysis/2020_District_Population.csv", header = T)
# #Create srs survey object
# dat.d <- svydesign(id = ~1, data = dat, fpc = ~N)
# 
# #Post-stratification of survey data: Raking by Statistics Greenland 2020 Population Demographics
# #1) Post-stratify by Region x Town/Settlement Population Margins
#   #Group pop data to Region_x_Town/Settlement
#   pop_strata_region <- pop[,.(Freq=sum(adult_population_2020,na.rm=T)),by=.(Region,Town_Settlement)]
#   
#   #Prepare for raking
#   setorder(pop_strata_region,Town_Settlement)
#   pop_strata_region[,Freq:=as.numeric(Freq)]
#   pop_strata_region
#   
#   #Add Greenland Age and Self Identified Gender Population Margins
#   #Group pop data to age group, gender
#   pop_strata_agender <-pop[,.(Freq=sum(adult_population_2020,na.rm=T)),by=.(Age_Group,Gender_Group)]
#   
#   #Prepare for raking
#   setorder(pop_strata_agender,Age_Group,Gender_Group)
#   pop_strata_agender[,Freq:=as.numeric(Freq)]
#   pop_strata_agender
# 
# #Rake by Region x Town/Settlement & Gender x Age Population Margins
# dat.d <- rake(dat.d, list(~Region+Town_Settlement,~Age_Group+Gender_Group), list(pop_strata_region,pop_strata_agender))
# 
# #Create new sampling weights column
# dat[,wts:=weights(dat.d)]
# 
# #Confirm post stratification sum 
# sum(weights(dat.d))
# 
# #Compare genderxage group sample strata weights to true population strata
# test<-dat[,.(wts=sum(wts)),by=.(Age_Group,Gender_Group)]
# setorder(test,Age_Group,Gender_Group)
# setorder(pop_strata_agender,Age_Group,Gender_Group)
# test
# pop_strata_agender
# 
# 
# #Compute demographic table comparing marginal splits pre and post weighting
# #specify demographics to summarize
# #"Age_Group","Gender_Group","GL_identity","Region","Town_Settlement"
# 
# #Adult sample size
# #dat[,.N]
# sample_total<-939
# 
# #Adult population total from 2020 Statistics Greenland
# #pop_strata_region[,sum(Freq)]
# pop_total<-37077
# 
# #compute unweighted total, unweighted percentage, weighted percentage 
# d1<-dat[,.(total=.N, pct = (.N/sample_total), pct.wt=(sum(wts)/pop_total)),by=.(Age_Group)]
# setnames(d1,"Age_Group","Demographic_Group")
# d2<-dat[,.(total=.N, pct = (.N/sample_total), pct.wt=(sum(wts)/pop_total)),by=.(Gender_Group)]
# setnames(d2,"Gender_Group","Demographic_Group")
# d3<-dat[,.(total=.N, pct = (.N/sample_total), pct.wt=(sum(wts)/pop_total)),by=.(GL_identity)]
# setnames(d3,"GL_identity","Demographic_Group")
# d4<-dat[,.(total=.N, pct = (.N/sample_total), pct.wt=(sum(wts)/pop_total)),by=.(Region)]
# setnames(d4,"Region","Demographic_Group")
# d5<-dat[,.(total=.N, pct = (.N/sample_total), pct.wt=(sum(wts)/pop_total)),by=.(Town_Settlement)]
# setnames(d5,"Town_Settlement","Demographic_Group")
# new<-list(d1,d2,d3,d4,d5)
# demo_table<-rbindlist(new,use.names=T,fill=T,idcol ="Demographic_Group")
# demo_table[,pct:=round(pct*100,1)]
# demo_table[,pct.wt:=round(pct.wt*100,1)]
# demo_table
# 
# #compute the weighted mean of each survey item for the population (returns estimated population proportions for multiple choice factors)
# cols<- c("S7_sand","S8_vigfokus","S9_samsand") #survey items
# pathdir <-"/Home/Analysis/Replication_Data/" #directory for writing and reading files
# 
# #for each variable (survey item) estimate population response distribution
# for (var in cols) {
#   gmean <- svymean(as.formula(paste0( "~" , var )), design = dat.d, na.rm=T)
# 
#   #population plot
#   gmean_plot <- as.data.frame(gmean, row.names = TRUE)
#   setDT(gmean_plot, keep.rownames = "answers")
#   #save data
#   write.csv(gmean_plot,file=paste0(pathdir,var,"_","gmean.csv"))
# }
```


#Fig 1 National Survey Response Summary Estimates (Run Plotting Code)
#Plot national descriptive statistics for climate and sand survey primary outcomes
```{r}
#set path to data (replace with your directory)
pathdir <-"/Home/Analysis/Replication_Data/"

#Create weighted plots for each item w/ confidence intervals
cols<- c("S7_sand","S8_vigfokus","S9_samsand")
for (var in cols) {
  #load data 
  gmean_plot<-fread(paste0(pathdir,var,"_","gmean.csv"),header=T)
  gmean_plot
  gmean_plot$answers <- gsub(paste0(var), "", gmean_plot$answers) 
  gmean_plot$percent <- gmean_plot$mean*100 #convert to %
  gmean_plot$SE <- gmean_plot$SE*100
  gmean_plot$percent <- round(gmean_plot$percent, digits=0) #round
  gpop<-ggplot(data=gmean_plot, aes(x=answers, y=percent)) + 
  geom_bar(stat="identity", fill= viridis(nrow(gmean_plot), option = "D", direction = -1, begin = .2, end = 1), position="dodge", color="white", width=0.8) + 
  geom_errorbar(aes(ymin=percent-SE*1.96, ymax=percent+SE*1.96), width=.1, size = .8, alpha = .2) +
  geom_text(aes(label=percent), hjust=-1, position=position_dodge(width=0.8), size=3) +
  labs(x=NULL) +
  scale_y_continuous(breaks=seq(0, 100, 25)) + expand_limits(y=c(0, 100)) +
  theme(plot.margin = unit(c(1, 2, 0, 0), "cm")) +
  coord_flip() + theme(panel.background = element_blank()) +
  ggtitle(paste0(var), subtitle="Climate Sand Survey")
  gpop
  
  write.csv(gmean_plot, file = paste0(pathdir,var,"_plot.csv"))
  message(gpop)
  ggsave(paste0(pathdir,var,"_plot.pdf"))
  ggsave(paste0(pathdir,var,"_plot.png"))

}

gc()
```

#Fig 2 Regional Response Plots (Run Plotting Code)
#Note Greenland map plotted in QGIS + Adobe Illustrator
```{r}
#Create weighted grouped barplots by Greenland municipal region w/ confidence intervals

cols <- c("S7_sand") #survey item to plot (sand extraction preferences)
cols_2 <- c("Region") #factor for subgroup analysis (region)

# Data Processing Code
# for (var in cols){
#   for (var_2 in cols_2){
#   gmean <- svyby(as.formula(paste0( "~" , var)), as.formula(paste0( "~" , var_2)), design = dat.d, na.rm=T, svymean, vartype=c("se"))
#   
#   #population grouped bar plot (grouped by var2)
#   #format output to DT
#   gmean_plot <- as.data.frame(gmean)
#   setDT(gmean_plot)
#   write.csv(gmean_plot,file=paste0(pathdir,var,"_","gmean_by_region_data.csv"))#save data
#   }}
  
#Data Plotting Code  
  for (var in cols){
  for (var_2 in cols_2){
  gmean_plot<-fread(paste0(pathdir,var,"_","gmean_by_region_data.csv"),header=T)#load data
  
  #convert cols to percent
  cols <- names(gmean_plot)
  cols <- gsub(paste0(var), "", cols)
  setnames(gmean_plot, old=names(gmean_plot), new = cols)
  cols<-cols[-(1:2)] #*** don't include ID or region name col in % conversion
  gmean_plot=gmean_plot[,(cols):=lapply(.SD, function(x) x*100), .SDcols = cols]
  gmean_plot=gmean_plot[,(cols):=lapply(.SD,function(x) round(x, digits=0)), .SDcols = cols]
  
  #convert to long format for plotting with CIs
  #extract standard error cols ***NOTE: extracts all cols with periods and "se" in col 
  gmean_se <- gmean_plot[,.SD,.SDcols = (names(gmean_plot) %like% "\\." & names(gmean_plot) %like% "se") | names(gmean_plot) == var_2]
  #extract standard error cols ***NOTE: extracts all cols without periods and "se" in col 
  gmean_names <- names(gmean_plot[,.SD,.SDcols = !names(gmean_se)])
  gmean_se <- melt(gmean_se, id.vars= var_2, variable.name = "answers")
  setnames(gmean_se, "value", "SE")
  gmean_plot <- melt(gmean_plot, id.vars= var_2, measure.vars = gmean_names, variable.name = "answers")
  setnames(gmean_plot, "value", "percent")
  gmean_se[,answers:=gsub("se\\.","",answers)]
  gmean_plot[,answers:=as.character(answers)]
  #merge reshaped cols 
  nrow(gmean_se)
  nrow(gmean_plot)
  gmean_plot<-merge(gmean_plot,gmean_se,by=c(var_2,"answers"))
  nrow(gmean_plot)
  
  #grouped bar plot
  gpop<-ggplot(data=gmean_plot, aes_string(x=var_2, y="percent", fill="answers")) + 
  geom_bar(position="dodge", stat="identity", color="white", width=0.8) + 
  geom_errorbar(aes(ymin=percent-SE*1.96, ymax=percent+SE*1.96), position=position_dodge(.8), width=.1, size = .8, alpha = .4) +
  geom_text(aes(label=percent), hjust=-1, position=position_dodge(width=0.8), size=3) +
  labs(x=NULL) +
  scale_y_continuous(breaks=seq(0, 100, 25)) + expand_limits(y=c(0, 100)) +
  theme(plot.margin = unit(c(1, 2, 0, 0), "cm")) +
  coord_flip() + theme(panel.background = element_blank()) +
  ggtitle(paste0(var), subtitle=paste0(var, "_", var_2))
  
  gpop
  
  #save plots 
  write.csv(gmean_plot, file = paste0(pathdir,var,"_",var_2,"_plot.csv"))
  message(gpop)
  ggsave(paste0(pathdir,var,"_",var_2,"_plot.pdf"))
  ggsave(paste0(pathdir,var,"_",var_2,"_plot.png"))
}}

gc()
gpop
```

#Logit regression analysis (data processing code for summary table)
#Exported table in folder
```{r}
# #Run logit regressions and output model summary tables
# #A. Support/Opposition for sand mining adaptation
# 
# #Output combined model summary tables
# models <- list(
#   "1A Sand Strong Support - Univariate" = svyglm(factor(sand_strong_favor)~ AACC, family=quasibinomial, design=dat.d, na.action=na.omit),
#   "1B Sand Strong Support - w/Geographic Controls" = svyglm(factor(sand_strong_favor)~ AACC + Region, family=quasibinomial, design=dat.d, na.action=na.omit),
#   "1C Sand Strong Support - w/Geographic + Demographic Controls" = svyglm(factor(sand_strong_favor)~Region + AACC + Age_Group + Gender_Group, family=quasibinomial, design=dat.d, na.action=na.omit),
#   "2A Sand Environmental Focus - Univariate" = svyglm(factor(sand_prefer_enviro_assessment)~ AACC, family=quasibinomial, design=dat.d, na.action=na.omit),
#   "2B Sand Environmental Focus - w/Geographic Controls" = svyglm(factor(sand_prefer_enviro_assessment)~ AACC + Region, family=quasibinomial, design=dat.d, na.action=na.omit),
#   "2C Sand Environmental Focus - w/Geographic + Demographic Controls" = svyglm(factor(sand_prefer_enviro_assessment)~Region + AACC + Age_Group + Gender_Group, family=quasibinomial, design=dat.d, na.action=na.omit),
#   "3A Sand Extra-National Involvement - Univariate" = svyglm(factor(sand_foreign_cooperation)~ AACC, family=quasibinomial, design=dat.d, na.action=na.omit),
#   "3B Sand Extra-National Involvement - w/Geographic Controls" = svyglm(factor(sand_foreign_cooperation)~ AACC + Region, family=quasibinomial, design=dat.d, na.action=na.omit),
#   "3C Sand Extra-National Involvement - w/Geographic + Demographic Controls" = svyglm(factor(sand_foreign_cooperation)~Region + AACC + Age_Group + Gender_Group, family=quasibinomial, design=dat.d, na.action=na.omit)
# )
# #print table with CIs
# modelsummary(models, exponentiate = TRUE, fmt = 2,
#              estimate = c("{estimate} ({conf.low}, {conf.high}){stars}"),
#              notes = list('+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001'),
#              statistic = NULL)
# 
# #print table with ses
# modelsummary(models, exponentiate = TRUE, fmt = 2,
#              estimate = c("{estimate} ({std.error}){stars}"),
#              notes = list('+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001'),
#              statistic = NULL)
# #save table
# modelsummary(models, exponentiate = TRUE, fmt = 2,
#              estimate = c("{estimate} ({std.error}){stars}"),
#              notes = list('+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001'),
#              statistic = NULL,
#              output = "model_output_table.docx"
#              )
# getwd()
```

#Fig 3 - OR Plots (Run plotting code)
#Formatted in Adobe Illustrator
```{r}
#Compare opportunistic adaptation preferences for those aware vs. unaware of human-caused climate change

pathdir <-"/Home/Analysis/Replication_Data/" #directory for reading and writing files

#Plot odds ratios
#specify model outcomes for multi-model odds ratio plot
vars <- c("sand_strong_favor","sand_prefer_enviro_assessment","sand_foreign_cooperation")

#create list for storing model outcome output
model_store<-vector("list",length=length(vars)) #combine rows

# #Data processing code
# #for each var, extract awareness of anthropogenic climate change Odds Ratio value 
# for (var in vars) { 
#   #reduced univariate model
#   logit<-svyglm(as.formula(paste0(var, "~ AACC")), family=quasibinomial, design=dat.d, na.action=na.omit)
#   message(paste0(summary(logit)))
#   logit<-exp(cbind(OR = coef(logit), confint(logit)))
#   logit<-as.data.frame(logit)
#   setDT(logit, keep.rownames = "Outcome")
#   #save logit coefficients for plotting
#   write.csv(logit,file=paste0(pathdir,var,"_","logit.csv"))
#   
#   #add geographic controls
#   logit_2<-svyglm(as.formula(paste0(var, "~ AACC + Region")), family=quasibinomial, design=dat.d, na.action=na.omit)
#   message(paste0(summary(logit_2)))
#   logit_2<-exp(cbind(OR = coef(logit_2), confint(logit_2)))
#   logit_2<-as.data.frame(logit_2)
#   setDT(logit_2, keep.rownames = "Outcome")
#   write.csv(logit_2,file=paste0(pathdir,var,"_","geo_logit.csv"))
#   
#   #add demographic controls
#   logit_3<-svyglm(as.formula(paste0(var, "~ AACC + Region + Age_Group + Gender_Group")), family=quasibinomial, design=dat.d, na.action=na.omit)
#   message(paste0(summary(logit_3)))
#   logit_3<-exp(cbind(OR = coef(logit_3), confint(logit_3)))
#   logit_3<-as.data.frame(logit_3)
#   setDT(logit_3, keep.rownames = "Outcome")
#   write.csv(logit_3,file=paste0(pathdir,var,"_","demo_geo_logit.csv"))
#   
# }

#Data plotting code
#load processed data 
for (var in vars) { 
  #reduced univariate model
  #load logit ORs for plotting
  logit<-fread(paste0(pathdir,var,"_","logit.csv"),header=T)
  setnames(logit,old=c("2.5 %","97.5 %"),new=c("lower","upper")) #Prep CIs
  logit<-logit[Outcome=="AACC1"] #select AACC
  logit[Outcome=="AACC1",Outcome:=paste0("Red_AACC1_",var)] #add var and model to outcome
  logit[,group:=paste0(var)] #add variable grouping var
  
  #add geographic controls
  logit_2<-fread(paste0(pathdir,var,"_","geo_logit.csv"),header=T)
  setnames(logit_2,old=c("2.5 %","97.5 %"),new=c("lower","upper")) #Prep CIs
  logit_2<-logit_2[Outcome=="AACC1"] #select AACC
  logit_2[Outcome=="AACC1",Outcome:=paste0("Geo_AACC1_",var)] #add var and model to outcome
  logit_2[,group:=paste0(var)] #add variable grouping var
  
  #add demographic controls
  logit_3<-fread(paste0(pathdir,var,"_","demo_geo_logit.csv"),header=T)
  setnames(logit_3,old=c("2.5 %","97.5 %"),new=c("lower","upper")) #Prep CIs
  logit_3<-logit_3[Outcome=="AACC1"] #select AACC
  logit_3[Outcome=="AACC1",Outcome:=paste0("GeoDemo_AACC1_",var)] #add var and model to outcome
  logit_3[,group:=paste0(var)] #add variable grouping var
  
  logit<-rbindlist(list(logit,logit_2,logit_3),use.names = T, fill = T)
  model_store[[var]]<- logit #store output in list
}

logit <- rbindlist(model_store, use.names = T, fill = T)
logit[,rnum:=.I] #add row number to order outcomes on x axis
logit[,Outcome:=paste0(rnum,"_",Outcome)]
logit

#rename group col
#line + dot plot
logit_plot<-ggplot(logit) +
  geom_pointrange(aes(x=Outcome, y=OR, ymin=lower, ymax=upper, color=factor(group)), alpha=0.9, size=.75) +
  theme(plot.margin = unit(c(1, 2, 0, 0), "cm")) +
  theme(panel.background = element_blank()) + scale_y_continuous(breaks=seq(0, 2.25, .25)) + expand_limits(y=c(0, 2.25)) + coord_flip() #+ ggtitle(paste0(var), subtitle=paste0(var, "_", var_2))

  #save plots 
  write.csv(logit, file = paste0(pathdir,"logit_plot_AACC.csv"))
  ggsave(paste0(pathdir,"logit_plot_AACC.pdf"))
  ggsave(paste0(pathdir,"logit_plot_AACC.png"))
  
  logit_plot
```




